/***
This do-file constructs the dataset for the reopenings event study and records the list of matched control states
***/

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "State Re-Openings"

* Create required subfolders
cap mkdir "${root}/data/derived/State Re-Openings"
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"


********************************************************************************
**# 1. Load and process data
********************************************************************************

*------------------------------------------------------------------------------*
* Import Affinity data (daily)
*------------------------------------------------------------------------------*
project, uses("${root}/data/web/data/Affinity - State - Daily.csv")
import delimited "${root}/data/web/data/Affinity - State - Daily.csv", clear 

* Clean-up date 
gen date = mdy(month, day, year)
format date %td
sum date 
local affinity_max_date = `r(max)'

keep if date >= mdy(1, 1, 2020)
keep date spend_all statefips
tempfile affinity_state 
save `affinity_state'

*------------------------------------------------------------------------------*
* Import employment (weekly)
*------------------------------------------------------------------------------*
project, uses("${root}/data/web/data/Employment - State - Weekly.csv")
import delimited "${root}/data/web/data/Employment - State - Weekly.csv", clear

** Convert to daily
gen date = mdy(month, day_endofweek, year)
format date %td
bysort statefips (date): gen number_days = date - date[_n-1]
assert number_days == 7 | number_days == .                                      // assert that data is currently weekly
gen dow = dow(date)
assert dow == 5                                                                 // assert that Friday is the end of the week
drop number_days dow month day_endofweek year 

foreach var in emp {
	bysort statefips (date): gen `var'_lead = `var'[_n-1] 
	gen w_diff_`var' = `var' - `var'_lead                                       // week-to-week change (week t - week t-1)
}

sum date
local emp_min_date = `r(min)'
local emp_max_date = `r(max)'

gen date_endofweek = date                                                       // record original dates 

tsset statefips (date)                                        
tsfill                                                                          // convert to daily data    

gen dow = dow(date)                                                             
gen Friday = date - dow + 5                                                     // original dates, without missings created by tsfill
replace Friday = Friday + 7 if dow == 6                                         // Stata weeks are Sun (0) to Sat (6). Paychex weeks are Sat (6) to Fri (5).
format %td Friday 
assert date_endofweek == Friday if !missing(date_endofweek)
assert date <= Friday & !missing(Friday)
drop date_endofweek dow       
                                              
foreach var in emp {      
	                                 
	gegen w_diff_`var'_n = mean(w_diff_`var'), by(statefips Friday)             // fill in missings in the data created by tsfill
	gegen `var'_lead_n = mean(`var'_lead), by(statefips Friday)
	assert w_diff_`var'_n == w_diff_`var' & `var'_lead_n == `var'_lead if date == Friday
	assert missing(w_diff_`var') & missing(`var'_lead) if date != Friday
	
	drop w_diff_`var'
	rename w_diff_`var'_n w_diff_`var'
	
	* Interpolate daily employment in week t as the end-of-week value in week t-1 + the weekly difference between the end-of-week t and end-of-week t-1 values * day of Paychex week / no. of days in Paychex week 
	* In other words, just spread the week-to-week changes evenly over days in the week and assert that this method gives us the original values for Fridays.
	bysort statefips Friday (date): gen `var'_d = `var'_lead_n + w_diff_`var' * (_n / _N)
	assert inrange(`var'_d, `var' - 1E-7, `var' + 1E-7) if date == Friday & !missing(`var') & date != `emp_min_date'
	assert missing(`var'_d) if date == `emp_min_date'
	replace `var'_d = `var' if date == `emp_min_date'
}

keep date emp_d statefips
rename emp_d emp

tempfile emp_state 
save `emp_state'

*------------------------------------------------------------------------------*
* Import Womply (weekly)
*------------------------------------------------------------------------------*
project, uses("${root}/data/web/data/Womply - State - Weekly.csv")
import delimited "${root}/data/web/data/Womply - State - Weekly.csv", clear 

gen sunday = mdy(month, day_endofweek, year)

** Convert to daily
gen date = mdy(month, day_endofweek, year)
format date %td
bysort statefips (date): gen number_days = date - date[_n-1]
assert number_days == 7 | number_days == .                                      // assert that data is currently weekly
gen dow = dow(date)
assert dow == 0                                                                 // assert that Sunday is the end of the week
drop number_days dow month day_endofweek year 

foreach var in merchants_all {
	bysort statefips (date): gen `var'_lead = `var'[_n-1] 
	gen w_diff_`var' = `var' - `var'_lead                                       // week-to-week change (week t - week t-1)
}

sum date
local womply_min_date = `r(min)'
local womply_max_date = `r(max)'

gen date_endofweek = date                                                       // record original dates 

tsset statefips (date)                                        
tsfill                                                                          // convert to daily data    

gen dow = dow(date)                                                             
gen Sunday = date - dow	                                                        // original dates, without missings created by tsfill
replace Sunday = Sunday + 7 if dow != 0                                         // Stata weeks are Sun (0) to Sat (6). Womply weeks are Mon (1) to Sun (0).
format %td Sunday 
assert date_endofweek == Sunday if !missing(date_endofweek)
assert date <= Sunday & !missing(Sunday)
drop date_endofweek dow       
                                              
foreach var in merchants_all {
	                                 
	gegen w_diff_`var'_n = mean(w_diff_`var'), by(statefips Sunday)             // fill in missings in the data created by tsfill
	gegen `var'_lead_n = mean(`var'_lead), by(statefips Sunday)
	assert w_diff_`var'_n == w_diff_`var' & `var'_lead_n == `var'_lead if date == Sunday
	assert missing(w_diff_`var') & missing(`var'_lead) if date != Sunday
	
	drop w_diff_`var'
	rename w_diff_`var'_n w_diff_`var'
	
	* Interpolate daily value in week t as the end-of-week value in week t-1 + the weekly difference between the end-of-week t and end-of-week t-1 values * day of Paychex week / no. of days in Womply week 
	* In other words, just spread the week-to-week changes evenly over days in the week and assert that this method gives us the original values for Sundays.
	bysort statefips Sunday (date): gen `var'_d = `var'_lead_n + w_diff_`var' * (_n / _N)
	assert inrange(`var'_d, `var' - 1E-7, `var' + 1E-7) if date == Sunday & !missing(`var') & date != `womply_min_date'
	assert missing(`var'_d) if date == `womply_min_date'
	replace `var'_d = `var' if date == `womply_min_date'
}

keep statefips date merchants_all_d
rename merchants_all_d merchants_all

tempfile womply_state 
save `womply_state'

*------------------------------------------------------------------------------*
* Import policy dates file 
*------------------------------------------------------------------------------*
project, uses("${root}/data/web_d008872/data/Policy Milestones - State.csv")
import delimited "${root}/data/web_d008872/data/Policy Milestones - State.csv", clear

rename statewide_biz_opened partial_biz_opened
rename statename state_name 

* Transform relevant date variables
foreach var in partial_biz_opened nonessential_biz_closed {
	gen day = substr(`var', 9, 2)
	gen month = substr(`var', 6, 2)
	gen year = substr(`var', 1, 4)
	foreach x in year month day {
		destring `x', replace 
	}
	gen date = mdy(month, day, year) 
	format date %d
	drop `var'
	rename date `var'
	drop day month year 
}

* Get max date in all datasets 
local end_date = max(`affinity_max_date', `emp_max_date', `womply_max_date')

* Expand dataset for merges
local obs_to_create = `end_date' - mdy(1, 1, 2020) + 1
expand(`obs_to_create')
bys statefips: gen date_temp = _n -1
gen date = mdy(1, 1, 2020) + date_temp
format date %td
drop date_temp

* Only keep variables we use below 	
keep state* date partial_biz_opened nonessential_biz_closed

* Merge on combined employment data
merge 1:1 statefips date using `emp_state', assert(1 3) nogen

* Merge on Affinity data 
merge 1:1 statefips date using `affinity_state', assert(1 3) nogen
	
* Merge on Womply data 
merge 1:1 statefips date using `womply_state', assert(1 3) nogen
	
	
********************************************************************************
**# 2. Select control states
********************************************************************************

gisid statefips date

* Reopening dates
local reopening_dates `=mdy(4, 20, 2020)' `=mdy(4, 24, 2020)' `=mdy(4, 27, 2020)' 

* Set list of outcomes 
local outcomes spend_all emp merchants_all 

*---------------------------------------------------------------------------
* Find controls--states similar in levels for each var in preperiod--state x date
*---------------------------------------------------------------------------

* Loop through outcome variables 
foreach date in `reopening_dates' { 
		
	* Eliminate from the control pool any state that reopened within three weeks from the treated state reopening dates 
	gen treat_`date' = (partial_biz_opened <= `date' + 21)
	
	* Loop through re-opening dates
	foreach outcome_var in `outcomes' {
			
		* We match control states to treatment states based on values of the outcome variable 1-3 weeks before the treatment
		gen temp = `outcome_var' if inrange(date, `date' - 21, `date' - 7)
		gegen t3_`outcome_var' = mean(temp), by(statefips)
		drop temp 
			
		* For each state, get its place in the distribution of the pre-treatment outcome variable across states
		* This variable ranges from 0 to 1
		cumul t3_`outcome_var' if date == `date', gen(temp_dist)
		gegen t3_`outcome_var'_dist = mean(temp_dist), by(statefips)
		assert t3_`outcome_var'_dist == temp_dist if !missing(temp_dist)
		drop temp_dist
			
		* For treated states, get their mean rank in the distribution of the pre-treatment outcome variable
		sum t3_`outcome_var'_dist if partial_biz_opened == `date'
		local target = r(mean)
				
		* Define control states as those within 0.2 of the treatment states' mean rank in the distribution of the pre-treatment outcome variable
		gen control_`date'_`outcome_var' = inrange(t3_`outcome_var'_dist, `target' - 0.2, `target' + 0.2) & treat_`date' == 0
		drop t3_`outcome_var' t3_`outcome_var'_dist
		
		* Ensure there are at least 3 control states
		gunique state_name if control_`date'_`outcome_var' == 1
		assert `r(unique)' > 2
	}
}

* Express outcomes in percentage points 
foreach outcome_var in `outcomes' {
	replace `outcome_var' = `outcome_var' * 100
}

save "${root}/data/derived/State Re-Openings/main_dataset updated", replace 
project, creates("${root}/data/derived/State Re-Openings/main_dataset updated.dta")


********************************************************************************
**# 3. Export list of control states
********************************************************************************

gen output_date = . 
gen output_treat = "" 
gen output_affinity_controls = "" 
gen output_emp_controls =  "" 
gen output_womply_controls =  "" 

* Loop over reopening dates 
local j = 1 
foreach date in `reopening_dates' {
	
	* Get list of treatment states 
	glevelsof state_name if partial_biz_opened == `date', local(states) 
	local i = 1 
	foreach state in `states' {
		local proper_state = proper("`state'")
		if `i' == 1 local treated_`date' "`proper_state'"
		else local treated_`date' "`treated_`date'', `proper_state'"
		local i = `i' + 1 
	}

	* Get list of controls 
    foreach var in `outcomes' {
		glevelsof state_name if control_`date'_`var' == 1, local(control_states) 
		local i = 1 
		foreach state in `control_states' {
			local proper_state = proper("`state'")
			if `i' == 1 local control_`date'_`var' "`proper_state'"
			else local control_`date'_`var' "`control_`date'_`var'', `proper_state'"
			local i = `i' + 1 
		}
	}
	 
	 * Keep names  
	 replace output_date = `date' in `j' 
	 replace output_treat = "`treated_`date''" in `j' 
	 replace output_emp_controls = "`control_`date'_emp'" in `j'
	 replace output_affinity_controls = "`control_`date'_spend_all'" in `j'
	 replace output_womply_controls = "`control_`date'_merchants_all'" in `j'

	 local j = `j' + 1 
}

* Export to Excel
keep if !missing(output_date)
format output_date %td  
keep output_date output_treat output_affinity_controls output_emp_controls output_womply_controls

export excel "${root}/results/new_app_table_10.xlsx", sheet("new_app_table_10", replace) firstrow(variables)
project, creates("${root}/results/new_app_table_10.xlsx")
